## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## [conflicted] Will prefer dplyr::filter over any other package.
## [conflicted] Will prefer dplyr::select over any other package.
## [conflicted] Will prefer dplyr::slice over any other package.
## [conflicted] Will prefer dplyr::rename over any other package.
## [conflicted] Will prefer dplyr::intersect over any other package.
mytheme = theme_bw(base_size = 10) +
theme(text = element_text(size=10, colour='black'),
title = element_text(size=10, colour='black'),
line = element_line(size=0.5),
axis.title = element_text(size=10, colour='black'),
axis.text = element_text(size=10, colour='black'),
axis.line = element_line(colour = "black"),
axis.ticks = element_line(size=0.5),
strip.background = element_blank(),
strip.text.y = element_blank(),
panel.grid = element_blank(),
panel.border = element_blank(),
legend.text=element_text(size=10)) ## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
counts =
read_tsv('data/D1515T43.R1_rc.fastq_slamdunk_mapped_filtered_tcount.tsv', skip = 2) %>%
mutate(sample = '250_uM_S4U_1', sample_ID = "D1515T43", conc = 250, seq_read = 'R1', replicate = 1) %>%
bind_rows(read_tsv('data/D1515T43.R2.fastq_slamdunk_mapped_filtered_tcount.tsv', skip = 2) %>%
mutate(sample = '250_uM_S4U_1', sample_ID = "D1515T43", conc = 250, seq_read = 'R2', replicate = 1)) %>%
bind_rows(read_tsv('data/D1515T44.R1_rc.fastq_slamdunk_mapped_filtered_tcount.tsv', skip = 2) %>%
mutate(sample = '250_uM_S4U_2', sample_ID = "D1515T44", conc = 250, seq_read = 'R1', replicate = 2)) %>%
bind_rows(read_tsv('data/D1515T44.R2.fastq_slamdunk_mapped_filtered_tcount.tsv', skip = 2) %>%
mutate(sample = '250_uM_S4U_2', sample_ID = "D1515T44", conc = 250, seq_read = 'R2', replicate = 2)) %>%
bind_rows(read_tsv('data/D1515T45.R1_rc.fastq_slamdunk_mapped_filtered_tcount.tsv', skip = 2) %>%
mutate(sample = '125_uM_S4U_1', sample_ID = "D1515T45", conc = 125, seq_read = 'R1', replicate = 1)) %>%
bind_rows(read_tsv('data/D1515T45.R2.fastq_slamdunk_mapped_filtered_tcount.tsv', skip = 2) %>%
mutate(sample = '125_uM_S4U_1', sample_ID = "D1515T45", conc = 125, seq_read = 'R2', replicate = 1)) %>%
bind_rows(read_tsv('data/D1515T46.R1_rc.fastq_slamdunk_mapped_filtered_tcount.tsv', skip = 2) %>%
mutate(sample = '125_uM_S4U_2', sample_ID = "D1515T46", conc = 125, seq_read = 'R1', replicate = 2)) %>%
bind_rows(read_tsv('data/D1515T46.R2.fastq_slamdunk_mapped_filtered_tcount.tsv', skip = 2) %>%
mutate(sample = '125_uM_S4U_2', sample_ID = "D1515T46", conc = 125, seq_read = 'R2', replicate = 2)) %>%
bind_rows(read_tsv('data/D1515T47.R1_rc.fastq_slamdunk_mapped_filtered_tcount.tsv', skip = 2) %>%
mutate(sample = '0_uM_S4U_2', sample_ID = "D1515T47", conc = 0, seq_read = 'R1', replicate = 2)) %>%
bind_rows(read_tsv('data/D1515T47.R2.fastq_slamdunk_mapped_filtered_tcount.tsv', skip = 2) %>%
mutate(sample = '0_uM_S4U_2', sample_ID = "D1515T47", conc = 0, seq_read = 'R2', replicate = 2)) %>%
bind_rows(read_tsv('data/D1515T48.R1_rc.fastq_slamdunk_mapped_filtered_tcount.tsv', skip = 2) %>%
mutate(sample = '0_uM_S4U_3', sample_ID = "D1515T48", conc = 0, seq_read = 'R1', replicate = 3)) %>%
bind_rows(read_tsv('data/D1515T48.R2.fastq_slamdunk_mapped_filtered_tcount.tsv', skip = 2) %>%
mutate(sample = '0_uM_S4U_3', sample_ID = "D1515T48", conc = 0, seq_read = 'R2', replicate = 3))## Rows: 276905 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): Chromosome, Name, Strand
## dbl (13): Start, End, Length, ConversionRate, ReadsCPM, Tcontent, CoverageOn...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 276905 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): Chromosome, Name, Strand
## dbl (13): Start, End, Length, ConversionRate, ReadsCPM, Tcontent, CoverageOn...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 276905 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): Chromosome, Name, Strand
## dbl (13): Start, End, Length, ConversionRate, ReadsCPM, Tcontent, CoverageOn...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 276905 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): Chromosome, Name, Strand
## dbl (13): Start, End, Length, ConversionRate, ReadsCPM, Tcontent, CoverageOn...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 276905 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): Chromosome, Name, Strand
## dbl (13): Start, End, Length, ConversionRate, ReadsCPM, Tcontent, CoverageOn...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 276905 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): Chromosome, Name, Strand
## dbl (13): Start, End, Length, ConversionRate, ReadsCPM, Tcontent, CoverageOn...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 276905 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): Chromosome, Name, Strand
## dbl (13): Start, End, Length, ConversionRate, ReadsCPM, Tcontent, CoverageOn...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 276905 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): Chromosome, Name, Strand
## dbl (13): Start, End, Length, ConversionRate, ReadsCPM, Tcontent, CoverageOn...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 276905 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): Chromosome, Name, Strand
## dbl (13): Start, End, Length, ConversionRate, ReadsCPM, Tcontent, CoverageOn...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 276905 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): Chromosome, Name, Strand
## dbl (13): Start, End, Length, ConversionRate, ReadsCPM, Tcontent, CoverageOn...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 276905 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): Chromosome, Name, Strand
## dbl (13): Start, End, Length, ConversionRate, ReadsCPM, Tcontent, CoverageOn...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 276905 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): Chromosome, Name, Strand
## dbl (13): Start, End, Length, ConversionRate, ReadsCPM, Tcontent, CoverageOn...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
remove all transcripts that were not found
add annotation
## Rows: 276905 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): chr, transcript_id, strand, transcript_type
## dbl (3): start, end, score
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
counts = counts %>%
rename('chr' = Chromosome,
'start' = Start,
'end' = End,
'transcript_id' = 'Name',
'strand' = Strand) %>%
left_join(anno) ## Joining with `by = join_by(chr, start, end, transcript_id, strand)`
simplify gene types
counts = counts %>%
mutate(transcript_type_cat = ifelse(transcript_type == 'protein_coding', 'protein_coding', 'other'),
transcript_type_cat = ifelse(transcript_type == 'lncRNA', 'lncRNA', transcript_type_cat))
counts$transcript_type_cat = factor(counts$transcript_type_cat, levels = c('protein_coding', 'lncRNA', 'other'))
countshow many transcript have at least one T > C conversion
## [1] 88.46036
bar plot
counts %>%
group_by(sample, conc) %>%
summarise(perc_conversed = sum(ConversionRate > 0.00) / n()) %>%
mutate(conc = paste(conc, 'µM', sep = ' ')) %>%
ggplot(aes(sample, perc_conversed, fill = conc)) +
geom_bar(stat = 'identity') +
scale_y_continuous(labels = scales::percent_format()) +
mytheme_discrete_x +
scale_fill_manual(values = c('lightcoral', 'deepskyblue', 'forestgreen'))## `summarise()` has grouped output by 'sample'. You can override using the
## `.groups` argument.
bar plot per read
perc_conversed = counts %>%
group_by(sample, seq_read, conc) %>%
summarise(nr_transcripts = n(),
nr_conversed = sum(ConversionsOnTs > 0),
perc_conversed = nr_conversed/nr_transcripts)## `summarise()` has grouped output by 'sample', 'seq_read'. You can override
## using the `.groups` argument.
perc_conversed %>%
ggplot(aes(sample, perc_conversed, fill = seq_read)) +
geom_bar(stat = 'identity', position = 'dodge') +
scale_y_continuous(labels = scales::percent_format()) +
mytheme_discrete_x +
facet_wrap(~conc, scales = 'free_x')with filter on minimal conversion rate
perc_conversed = counts %>%
group_by(sample, seq_read, conc) %>%
summarise(nr_transcripts = n(),
nr_conversed = sum(ConversionsOnTs > 0),
nr_conversed = sum(ConversionRate > 0.01),
perc_conversed = nr_conversed/nr_transcripts) ## `summarise()` has grouped output by 'sample', 'seq_read'. You can override
## using the `.groups` argument.
perc_conversed %>%
ggplot(aes(sample, perc_conversed, fill = seq_read)) +
geom_bar(stat = 'identity', position = 'dodge') +
scale_y_continuous(labels = scales::percent_format()) +
mytheme_discrete_x +
facet_wrap(~conc, scales = 'free_x')how many labeled per gene category
perc_conversed = counts %>%
#filter(ReadCount > 99) %>%
group_by(sample, seq_read, conc, transcript_type_cat) %>%
summarise(nr_transcripts = n(),
nr_conversed = sum(ConversionsOnTs > 0),
perc_conversed = nr_conversed/nr_transcripts) ## `summarise()` has grouped output by 'sample', 'seq_read', 'conc'. You can
## override using the `.groups` argument.
perc_conversed %>%
ggplot(aes(sample, perc_conversed, fill = transcript_type_cat)) +
geom_bar(stat = 'identity', position = 'dodge') +
scale_y_continuous(labels = scales::percent_format()) +
mytheme_discrete_x +
facet_wrap(~conc, scales = 'free_x')with filter on minimal conversion rate
perc_conversed = counts %>%
#filter(ReadCount > 99) %>%
group_by(sample, seq_read, conc, transcript_type_cat) %>%
summarise(nr_transcripts = n(),
nr_conversed = sum(ConversionsOnTs > 0),
nr_conversed = sum(ConversionRate > 0.01),
perc_conversed = nr_conversed/nr_transcripts) ## `summarise()` has grouped output by 'sample', 'seq_read', 'conc'. You can
## override using the `.groups` argument.
perc_conversed %>%
ggplot(aes(sample, perc_conversed, fill = transcript_type_cat)) +
geom_bar(stat = 'identity', position = 'dodge') +
scale_y_continuous(labels = scales::percent_format()) +
mytheme_discrete_x +
facet_wrap(~conc, scales = 'free_x')median conversion rate
## `summarise()` has grouped output by 'sample'. You can override using the
## `.groups` argument.
bar plot
counts %>%
mutate(conc = as.character(conc)) %>%
group_by(sample, conc) %>%
summarise(median_conversionrate = median(ConversionRate)) %>%
ggplot(aes(sample, median_conversionrate, fill = conc)) +
geom_bar(stat = 'identity', position = 'dodge') +
scale_y_continuous(labels = scales::percent_format()) +
mytheme_discrete_x +
scale_fill_manual(values = c('lightcoral', 'forestgreen', 'deepskyblue')) +
facet_wrap(~conc, scales = 'free_x') +
geom_text(aes(label = sprintf("%0.2f", round(100 * median_conversionrate, digits = 4))), vjust = -0.2)## `summarise()` has grouped output by 'sample'. You can override using the
## `.groups` argument.
bar plot with separate seq reads
counts %>%
mutate(conc = as.character(conc),
conc_2 = paste(conc, '_', seq_read, sep = '')) %>%
group_by(sample, seq_read, conc_2, conc) %>%
summarise(median_conversionrate = median(ConversionRate)) %>%
ggplot(aes(sample, median_conversionrate, fill = conc_2)) +
geom_bar(stat = 'identity', position = 'dodge') +
scale_y_continuous(labels = scales::percent_format()) +
mytheme_discrete_x +
scale_fill_manual(values = c('mistyrose1', 'lightcoral', 'lightblue', 'deepskyblue','lightgreen', 'forestgreen')) +
#geom_text(aes(label = sprintf("%0.2f", round(100 * median_conversionrate, digits = 4))), vjust = -0.2) +
facet_wrap(~conc, scales = 'free_x')## `summarise()` has grouped output by 'sample', 'seq_read', 'conc_2'. You can
## override using the `.groups` argument.
boxplot
counts %>%
mutate(conc = as.character(conc)) %>%
ggplot(aes(sample, ConversionRate, fill = conc)) +
geom_boxplot() +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_manual(values = c('lightcoral', 'forestgreen', 'deepskyblue')) +
mytheme_discrete_xboxplot with separate seq reads
counts %>%
mutate(conc = as.character(conc),
conc_2 = paste(conc, '_', seq_read, sep = '')) %>%
ggplot(aes(sample, ConversionRate, fill = conc_2)) +
geom_boxplot() +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_manual(values = c('mistyrose1', 'lightcoral', 'lightblue', 'deepskyblue','lightgreen', 'forestgreen')) +
mytheme_discrete_xdensity plot
counts %>%
mutate(conc = as.character(conc)) %>%
ggplot((aes(ConversionRate, group = sample, color = conc))) +
geom_density() +
scale_x_continuous(labels = scales::percent_format())counts %>%
ggplot(aes(transcript_type_cat, ConversionRate, fill = transcript_type_cat)) +
geom_boxplot() +
facet_wrap(~sample, nrow = 3) +
mytheme_discrete_x +
scale_y_continuous(labels = scales::percent_format())counts %>%
mutate(conc = as.character(conc)) %>%
ggplot(aes(ReadsCPM, ConversionRate, color = conc)) +
geom_point(alpha = 0.2) +
scale_y_continuous(labels = scales::percent_format()) +
scale_x_log10() +
facet_wrap(~sample)## `summarise()` has grouped output by 'sample'. You can override using the
## `.groups` argument.